Importazione dati e creazione grafo
# Lettura e pulizia dati collaborazioni
df <- read.csv("Collaborazioni_finale_oggi.csv", stringsAsFactors = FALSE)
head(df)
## main_artist featured_artist track_name release_year
## 1 Sfera Ebbasta Shiva SNTMNG 2025
## 2 Sfera Ebbasta Shiva NON METTERCI BECCO 2025
## 3 Sfera Ebbasta Shiva SEI PERSA 2025
## 4 Sfera Ebbasta Shiva MOLECOLE SPRITE 2025
## 5 Sfera Ebbasta Shiva MAYBACH 2025
## 6 Sfera Ebbasta Shiva NEON 2025
df_clean <- df %>%
filter(main_artist != featured_artist) %>%
distinct(main_artist, featured_artist, track_name, release_year, .keep_all = TRUE)
cat("Numero righe dopo pulizia:", nrow(df_clean), "\n")
## Numero righe dopo pulizia: 3108
write.csv(df_clean, "Collaborazioni_finale_oggi_clean.csv", row.names = FALSE)
# Lettura artisti e preparazione grafo
artist <- read.csv("artisti.csv") %>% select(-id)
feat <- read_csv("Collaborazioni_finale_oggi_clean.csv")
## Rows: 3108 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): main_artist, featured_artist, track_name
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
colnames(feat)[1:2] <- c("from", "to")
# Normalizzazione nomi
artist$artist <- str_trim(tolower(artist$artist))
feat$from <- str_trim(tolower(feat$from))
feat$to <- str_trim(tolower(feat$to))
# Filtro collaborazioni valide e creazione grafo
feat_clean <- feat %>%
filter(from %in% artist$artist & to %in% artist$artist)
g <- graph_from_data_frame(feat_clean, directed = TRUE, vertices = artist)
E(g)$release_year <- feat_clean$release_year
cat("Numero nodi:", length(V(g)), "\n")
## Numero nodi: 200
cat("Numero archi:", length(E(g)), "\n")
## Numero archi: 3108
cat("Densità :", round(edge_density(g), 4), "\n")
## Densità : 0.0781
vertex_attr_names(g)
## [1] "name" "genres"
edge_attr_names(g)
## [1] "track_name" "release_year"
Funzione per analisi temporali
# Funzione generale per analisi per anno
analyze_by_year <- function(g, years = 2009:2024, metric_func) {
results <- data.frame()
for (yr in years) {
edges_year <- E(g)[E(g)$release_year == yr]
if (length(edges_year) > 0) {
g_year <- subgraph.edges(g, edges_year, delete.vertices = TRUE)
metric_val <- metric_func(g_year)
} else {
metric_val <- NA
}
results <- rbind(results, data.frame(anno = yr, valore = metric_val))
}
return(results)
}
# Funzione per grafici temporali
create_temporal_plot <- function(data, title, ylabel, as_percentage = FALSE) {
if (as_percentage) {
data <- data %>%
mutate(
valore_pct = valore * 100,
label = ifelse(!is.na(valore), paste0(round(valore * 100, 1), "%"), "")
)
p <- ggplot(data, aes(x = anno, y = valore_pct))
} else {
p <- ggplot(data, aes(x = anno, y = valore))
}
p + geom_area(fill = "#1cd463", alpha = 0.6) +
geom_line(color = "#3d811c", size = 1.2) +
geom_point(color = "#00441b", size = 3) +
labs(title = title, x = "Anno", y = ylabel) +
{if (as_percentage) scale_y_continuous(labels = function(x) paste0(x, "%"))} +
theme_minimal(base_size = 15)
}
Analisi di reciprocità del grafo
recip_data <- analyze_by_year(g, 2009:2024, function(g_year) {
if (ecount(g_year) > 0) reciprocity(g_year) else NA
})
## Warning: `subgraph.edges()` was deprecated in igraph 2.1.0.
## ℹ Please use `subgraph_from_edges()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
create_temporal_plot(recip_data, "Evoluzione della Reciprocità ", "Reciprocità (%)", TRUE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

TransitivitÃ
trans_data <- analyze_by_year(g, 2009:2024, function(g_year) {
g_und <- as.undirected(g_year, mode = "collapse")
tryCatch(transitivity(g_und, type = "global"), error = function(e) NA)
})
## Warning: `as.undirected()` was deprecated in igraph 2.1.0.
## ℹ Please use `as_undirected()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
create_temporal_plot(trans_data, "Evoluzione della Transitività ", "Transitività (%)", TRUE)

Paradosso dell’amico
deg <- degree(g, mode = "all")
neighbor_deg_mean <- knn(g, mode = "all")$knn
paradox_df <- data.frame(
artist = V(g)$name,
degree = deg,
neighbor_degree_mean = neighbor_deg_mean,
diff = neighbor_deg_mean - deg
)
paradox_value <- mean(paradox_df$neighbor_degree_mean > paradox_df$degree, na.rm = TRUE)
paradox_data <- data.frame(
condizione = c("Vale il paradosso", "Non vale il paradosso"),
valore = c(paradox_value, 1 - paradox_value)
) %>%
mutate(
percentuale = paste0(round(valore * 100, 1), "%"),
ypos = cumsum(valore) - 0.5 * valore
)
ggplot(paradox_data, aes(x = "", y = valore, fill = condizione)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y") +
geom_text(aes(y = ypos, label = percentuale), color = "white", size = 5) +
scale_fill_manual(values = c("Vale il paradosso" = "#1cd463",
"Non vale il paradosso" = "#3d811c")) +
labs(title = "Paradosso dell'amico tra gli artisti") +
theme_void() +
theme(legend.title = element_blank())

Analisi di assortatività per grado
# Assortatività globale per grado
g_sub <- subgraph_from_edges(g, E(g)[release_year >= 2009 & release_year <= 2024],
delete.vertices = TRUE)
assort_degree <- assortativity_degree(g_sub)
cat("Assortatività per grado:", round(assort_degree, 3), "\n")
## Assortatività per grado: 0.364
# Assortatività per anno
assort_data <- analyze_by_year(g, 2009:2024, function(g_year) {
if (ecount(g_year) > 0 && vcount(g_year) > 1) {
assortativity_degree(g_year)
} else {
NA
}
})
ggplot(assort_data, aes(x = anno, y = valore)) +
geom_line(color = "#1cd463", linewidth = 1.2) +
geom_point(color = "#1cd463", size = 2) +
geom_smooth(method = "loess", se = FALSE, color = "gray40", linetype = "dashed") +
labs(title = "Evoluzione dell'assortatività per grado",
x = "Anno", y = "Indice di assortatività ") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Collaborazioni intra-genere nel tempo
# Analisi collaborazioni intra vs inter genere
edges_df <- igraph::as_data_frame(g, what = "edges") %>%
mutate(
from_genre = V(g)$genres[match(from, V(g)$name)],
to_genre = V(g)$genres[match(to, V(g)$name)],
same_genre = from_genre == to_genre,
year = E(g)$release_year
)
intra_genre_by_year <- edges_df %>%
filter(year >= 2009 & year <= 2024) %>%
group_by(year) %>%
summarise(
intra = sum(same_genre, na.rm = TRUE),
inter = sum(!same_genre, na.rm = TRUE)
) %>%
pivot_longer(cols = c("intra", "inter"), names_to = "type", values_to = "count")
ggplot(intra_genre_by_year, aes(x = year, y = count, color = type)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
theme_minimal() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_color_manual(values = c("intra" = "#1cd463", "inter" = "#3d811c")) +
labs(title = "Collaborazioni intra- vs inter- genere",
x = "Anno", y = "Numero di collaborazioni", color = "Tipo di ft")

Analisi temporale: numero di collaborazioni per anno
year_table <- as.data.frame(table(E(g)$release_year)) %>%
setNames(c("year", "collaborations")) %>%
mutate(year = as.numeric(as.character(year))) %>%
filter(year >= 2009 & year <= 2024) %>%
arrange(year)
ggplot(year_table, aes(x = year, y = collaborations)) +
geom_line(color = "#1cd463", linewidth = 1.2) +
geom_point(color = "#1cd463", size = 2) +
theme_minimal() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
labs(title = "Evoluzione dei ft nel tempo",
x = "Anno", y = "Numero di collaborazioni")

Visualizzazione crescita del grafo
df_growth <- read_csv("collaborazioni_finale_oggi_clean.csv") %>%
filter(release_year >= 2009) %>%
distinct(main_artist, featured_artist, release_year)
## Rows: 3108 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): main_artist, featured_artist, track_name
## dbl (1): release_year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
years <- sort(unique(df_growth$release_year))
g_full <- graph_from_data_frame(df_growth %>% select(main_artist, featured_artist),
directed = TRUE)
# Layout fisso per l'animazione
lay <- layout_with_kk(g_full)
lay_df <- as.data.frame(lay) %>%
setNames(c("x", "y")) %>%
mutate(node = V(g_full)$name)
# Preparazione dati per animazione
nodes_list <- list()
edges_list <- list()
for (year in years) {
temp_edges <- df_growth %>%
filter(release_year <= year) %>%
select(main_artist, featured_artist)
g_year <- graph_from_data_frame(temp_edges, directed = TRUE,
vertices = V(g_full)$name)
active_nodes <- V(g_year)$name
nodes_list[[as.character(year)]] <- lay_df %>%
filter(node %in% active_nodes) %>%
mutate(year = year)
edges_df <- igraph::as_data_frame(g_year, what = "edges") %>%
left_join(lay_df, by = c("from" = "node")) %>%
left_join(lay_df, by = c("to" = "node"), suffix = c("", "_end")) %>%
mutate(year = year)
edges_list[[as.character(year)]] <- edges_df
}
nodes_all <- bind_rows(nodes_list)
edges_all <- bind_rows(edges_list)
p <- ggplot() +
geom_segment(data = edges_all,
aes(x = x, y = y, xend = x_end, yend = y_end),
alpha = 0.3, color = "#676b65") +
geom_point(data = nodes_all, aes(x = x, y = y),
size = 2, color = "#1cd463") +
theme_void() +
ggtitle("Anno: {closest_state}") +
transition_states(year, transition_length = 2, state_length = 1) +
ease_aes("cubic-in-out")
animate(p, nframes = length(years) * 3, fps = 10, width = 800, height = 600,
renderer = gifski_renderer("network_evolution.gif"))

Heatmap generi vs comunitÃ
membership_girvan <- membership(girvan_comm)
# Analisi distribuzione generi per comunitÃ
df_gen_comm <- data.frame(
artist = names(membership_girvan),
community = as.factor(membership_girvan),
genre = V(g_undirected)$genres[match(names(membership_girvan), V(g_undirected)$name)]
) %>%
filter(!is.na(genre))
# Top 3 comunità per dimensione
top_3_communities <- df_gen_comm %>%
count(community) %>%
arrange(desc(n)) %>%
slice_head(n = 3) %>%
pull(community)
genre_community_prop <- df_gen_comm %>%
filter(community %in% top_3_communities) %>%
count(genre, community) %>%
group_by(community) %>%
mutate(proportion = n / sum(n)) %>%
ungroup() %>%
select(genre, community, proportion) %>%
pivot_wider(names_from = community, values_from = proportion, values_fill = 0) %>%
pivot_longer(-genre, names_to = "Community", values_to = "Proportion") %>%
rename(Genre = genre)
ggplot(genre_community_prop, aes(x = Community, y = Genre, fill = Proportion)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "white", high = "#1cd463") +
labs(title = "Distribuzione dei generi per comunità ",
x = "Comunità ", y = "Genere", fill = "Proporzione") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid = element_blank())
